## Using MLE to choose the single s that rationalizes forecasts, given beliefs centered at the actual result. 

all_results = read.csv("all_results_05_10_15.csv")

# normalize results first 
parties = c("con", "lab", "ld", "snp", "pc", "ukip", "grn")
all_results$row_sum <- apply(all_results[,parties], 1, sum, na.rm = T)
all_results[,parties] = all_results[,parties]/all_results$row_sum

AR <- merge(all_results[all_results$type == "forecast",c("refno", "year", parties, "row_sum")], all_results[all_results$type == "actual",c("refno", "year", parties, "row_sum")], by = c("refno", "year"), suffixes = c(".forecast", ".actual"), all = T)

row_sum_cutoff <- 80
to_keep <- !is.na(AR$row_sum.actual) & !is.na(AR$row_sum.forecast) & AR$row_sum.actual > row_sum_cutoff & AR$row_sum.forecast > row_sum_cutoff # & AR$how_many_na < 14
AR <- AR[to_keep,]

how.many.na <- function(x){sum(is.na(x))}
AR$how_many_na <- apply(AR[,c(paste0(parties, ".actual"),paste0(parties, ".forecast"))], 1, how.many.na)
# 25 cases have only 3 parties listed! this is where we only have Con Lab and LD in 2005. 



# we want to pass a single vector containing the values at which we want to evaluate the ddirichlet and the parameters. 
# there are some issues with missing values. our rule is: if either is missing, we omit and rescale. 
ddirichlet.with.zeros = function(vec, s){
  # the first half of the parameter vector is the result at which we want to evaluate the dirichlet density. 
  # the second half is the expected result, which we combine with s to get the parameter vector.
  stopifnot(length(vec)%%2 == 0) # just checking that vec is an even number 
  half = length(vec)/2
  x = vec[1:half] # x is the forecasts -- the first half of vec 
  v = vec[(half+1):length(vec)] # expected result is the second half
  to.keep = !is.na(x) & !is.na(v) # keep the vote shares that are not missing in either
  x = x[to.keep]; v = v[to.keep]
  ddirichlet(x/sum(x), s*v/sum(v))  # compute dirichlet density 
} 

ss = seq(5, 200, 1)
# compute density at actual result given expectation at forecast 
# we exclude 2005 because the forecasts we have are not as good for that year. 
this_AR <- AR[AR$year %in% c(2010, 2015),c(paste0(parties, ".actual"), paste0(parties, ".forecast"))]
lls = c()
for(s in ss){
  ll = sum(log(apply(this_AR, 1, ddirichlet.with.zeros, s = s)))
  lls = c(lls, ll)
}

# compute density at forecast result given expectation at actual 
this_AR <- AR[AR$year %in% c(2010, 2015),c(paste0(parties, ".forecast"), paste0(parties, ".actual"))]
lls.2 = c()
for(s in ss){
  ll = sum(log(apply(this_AR, 1, ddirichlet.with.zeros, s = s)))
  lls.2 = c(lls.2, ll)
}

plot(ss, lls, type = "l")
lines(ss, lls.2, lty = 2)

ss[lls == max(lls)]  # 83: rationalizing actual result when expected result is the forecast result
ss[lls.2 == max(lls.2)] # 87: rationalizing forecasts when expected result is the actual result   

